def clean(file):
    output = open(file.replace('.dic', '_clean.dic'), 'w', encoding='utf-8')
    pool = set()
    count = 0
    for line in open(file, 'r', encoding='utf-8'):
        line = line.strip()
        if line in pool or len(line) == 0:
            count += 1
            continue
        pool.add(line)
        output.write(line + '\n')
    print(count)
    output.close()

clean('第一次维基应用科学/raw.dic')
clean('互动百科/raw.dic')
clean('沪深美港上市公司/raw.dic')
clean('维基重定向/raw.dic')
